/***
This do-file compares the distribution of wages in the Earnin data with the CPS.
***/

*-------------------------------------------------------------------------------
* Set up
*-------------------------------------------------------------------------------

* Set $root 
project figstabs, root
if (r(buildrunning)==0) include "${root}/code/config_interactive.do"

* Set globals
project, uses("${root}/code/set_globals.do")
include "${root}/code/set_globals.do"
local category "Employment"

* Create directories
cap mkdir "${root}/results/paper numbers"
cap mkdir "${root}/results/paper numbers/`category'"


*-------------------------------------------------------------------------------
* 1 - Import Earnin
*-------------------------------------------------------------------------------

project, uses("${root}/data/dvc/Earnin/historical/Earnin-median.dta")
use "${root}/data/dvc/Earnin/historical/Earnin-median.dta", clear

sum jan2020_wage, d

foreach percentile in "p25" "p50" "p75" {
	local earnin_wage_`percentile' = `r(`percentile')'
}

*-------------------------------------------------------------------------------
* 2 - Import CPS and apply sample restrictions
*-------------------------------------------------------------------------------

* Same processing as in figs/Employment/Create CPS by wage quartile series.do

* Load CPS data
project, uses("${root}/data/dvc/CPS/cps_00037.dta")
use "${root}/data/dvc/CPS/cps_00037.dta", clear 

* Sample restrictions
assert !mi(age, year)
keep if age >= 16
keep if year == 2020 & month == 1

* Create NAICS
gen naics = . 
replace naics = 11 if inrange(ind, 0170, 0290)
replace naics = 21 if inrange(ind, 0370, 0490)
replace naics = 23 if inrange(ind, 0770, 0770)
replace naics = 31 if inrange(ind, 1070, 1790)
replace naics = 32 if inrange(ind, 1870, 2590)
replace naics = 33 if inrange(ind, 2670, 3990)
replace naics = 42 if inrange(ind, 4070, 4590)
replace naics = 44 if inrange(ind, 4670, 5190)
replace naics = 45 if inrange(ind, 5275, 5790)
replace naics = 48 if inrange(ind, 6070, 6290)
replace naics = 49 if inrange(ind, 6370, 6390)
replace naics = 22 if inrange(ind, 0570, 0690)
replace naics = 51 if inrange(ind, 6470, 6780)
replace naics = 52 if inrange(ind, 6870, 6992)
replace naics = 53 if inrange(ind, 7070, 7190)
replace naics = 54 if inrange(ind, 7270, 7490)
replace naics = 55 if inrange(ind, 7570, 7570)
replace naics = 56 if inrange(ind, 7580, 7790)
replace naics = 61 if inrange(ind, 7860, 7890)
replace naics = 62 if inrange(ind, 7970, 8470)
replace naics = 71 if inrange(ind, 8560, 8590)
replace naics = 72 if inrange(ind, 8660, 8690)
replace naics = 81 if inrange(ind, 8770, 9290)
replace naics = 92 if inrange(ind, 9370, 9890)
						
* Be consistent with PIE and CES series
gen naics_code = ""
replace naics_code = "11" if naics == 11
replace naics_code = "21" if naics == 21
replace naics_code = "22" if naics == 22
replace naics_code = "23" if naics == 23
replace naics_code = "3133" if naics == 31 | naics == 32 | naics == 33
replace naics_code = "42" if naics == 42
replace naics_code = "4445" if naics == 44 | naics == 45
replace naics_code = "4849" if naics == 48 | naics == 49
replace naics_code = "51" if naics == 51
replace naics_code = "52" if naics == 52
replace naics_code = "53" if naics == 53
replace naics_code = "54" if naics == 54
replace naics_code = "55" if naics == 55
replace naics_code = "56" if naics == 56
replace naics_code = "61" if naics == 61
replace naics_code = "62" if naics == 62
replace naics_code = "71" if naics == 71
replace naics_code = "72" if naics == 72
replace naics_code = "81" if naics == 81
				
* Drop some sectors according to BLS adjustment 
drop if naics == 92 		// drop those working in public sector to match CES (Total Private Employment)
drop if naics == 11 		// drop those working in agriculture, forestry, fishing, and hunting according to BLS adjustment of CPS to CES
drop if naics == 9290 		// drop workers in private households such as nannies, housekeepers, etc.
			
* Drop some classes of workers according to BLS adjustment
drop if inlist(classwkr, 0, 13, 25, 26, 27, 28, 29) 	// drop missing (0), unincorporated, self-employed (13), and all public sector employees (25-29) 
			
* Keep those with jobs 
keep if empstat == 10
			
* Convert to super sector
gen naics_ss = ""
replace naics_ss = "10" if inlist(naics_code, "11", "21")
replace naics_ss = "20" if inlist(naics_code, "23")
replace naics_ss = "30" if inlist(naics_code, "31-33")
replace naics_ss = "40" if inlist(naics_code, "42", "44-45", "48-49", "22")
replace naics_ss = "50" if inlist(naics_code, "51")
replace naics_ss = "55" if inlist(naics_code, "52", "53")
replace naics_ss = "60" if inlist(naics_code, "54", "55", "56")
replace naics_ss = "65" if inlist(naics_code, "61", "62")
replace naics_ss = "70" if inlist(naics_code, "71", "72")
replace naics_ss = "80" if inlist(naics_code, "81")
			
* Reformat NAICS codes  
replace naics_code = subinstr(naics_code, "-", "_", .) 
			
* Define hourly wages
cap drop wage
replace earnweek = . if earnweek > 9999 
replace uhrswork1 = . if uhrswork1 > 996
replace hourwage = . if hourwage > 999

replace hourwage = earnweek / uhrswork1 if mi(hourwage) & paidhour == 2  		// if paid hourly, divide weekly earnings by amount of hours usually worked

gen wage = hourwage if paidhour == 2
replace wage = earnweek / uhrswork1 if paidhour == 1

replace wage = 100 if wage > 100 & !mi(wage)
replace wage = 5 if wage < 5

* Generate CPS cumulative distribution
* aw with equal option to be equivalent to pw
cumul wage [aw=earnwt], gen(cumwage) equal

*-------------------------------------------------------------------------------
* 3 - Export scalars
*-------------------------------------------------------------------------------

* Export scalars
cap erase "${root}/results/paper numbers/`category'/Earnin CPS comparison.yaml"

* Finding where Earnin median and interquartile range fall in CPS distribution
foreach percentile in "p25" "p50" "p75" {
	sum cumwage if wage < `earnin_wage_`percentile''
	local cps_wage_`percentile' = round(`r(max)' * 100, 1)
	
	yamlout using "${root}/results/paper numbers/`category'/Earnin CPS comparison.yaml", ///
	key("earnin_wage_`percentile'") ///
	comment("Wage at `percentile' of the Earnin wage distribution") ///
	value(`earnin_wage_`percentile'') fmt(%4.2f)

	yamlout using "${root}/results/paper numbers/`category'/Earnin CPS comparison.yaml", ///
	key("cps_wage_`percentile'") ///
	comment("CPS wage percentile at `percentile' of the Earnin wage distribution") ///
	value(`cps_wage_`percentile'')
}

project, creates("${root}/results/paper numbers/`category'/Earnin CPS comparison.yaml")
